## Load libraries----
options(device = "quartz")
rm(list = ls())
library(grf)
library(xtable)
library(tictoc)
library(MDEI)
library(foreign)
library(gam)
library(splines2)
library(gridExtra)


## Original analysis from BH ----
d <- read.dta("1998_2002.dta")
# reduce dataset to relevant variables
varnames <-
  c(
    "wkr",
    "bula",
    "wkrname",
    "Flooded",
    "del_spd_z_vs",
    "near_dist_allrivers",
    "ddpopdensity_",
    "ddshforeign_",
    "ddpopnetinp1000_",
    "ddshpop_o60_",
    "ddue_",
    "ddshagric_",
    "ddshmanu_",
    "ddshtradservice_",
    "ddshotherservice_",
    "ddsinc_SPD"
  )
d <- na.omit(d[, varnames])

# fit GAM
gam.out     <-
  gam(
    del_spd_z_vs  ~ s(near_dist_allrivers, 4) + ddpopdensity_ + ddshforeign_ +
      ddpopnetinp1000_ + ddshpop_o60_ + ddue_
    + ddshagric_ + ddshmanu_ + ddshtradservice_ + ddshotherservice_ +
      ddsinc_SPD,
    data = d
  )

maxdist <- max(d$near_dist_allrivers[d$Flooded == 1])
mindist <- max(d$near_dist_allrivers[d$Flooded == 1])

minpop <- min(d$ddpopdensity_[d$Flooded == 1])
trim.dist <- (d$near_dist_allrivers <= maxdist)

# Fit lm
lm.out     <-
  lm(
    del_spd_z_vs  ~ Flooded + ddpopdensity_ + ddshforeign_ + ddpopnetinp1000_ +
      ddshpop_o60_ + ddue_ + ddshagric_ + ddshmanu_ + ddshtradservice_ + ddshotherservice_ +
      ddsinc_SPD,
    data = d
  )


# Fit trimmed lm
lm.trim     <-
  lm(
    del_spd_z_vs  ~ Flooded + ddpopdensity_ + ddshforeign_ + ddpopnetinp1000_ +
      ddshpop_o60_ + ddue_ + ddshagric_ + ddshmanu_ + ddshtradservice_ + ddshotherservice_ +
      ddsinc_SPD,
    data = d,
    subset = d$near_dist_allrivers <= maxdist
  )


# Fit trimmed gam
gam.trim  <-
  gam::gam(
    del_spd_z_vs  ~ Flooded + s(near_dist_allrivers) + ddpopdensity_ + ddshforeign_ +
      ddpopnetinp1000_ + ddshpop_o60_ + ddue_
    + ddshagric_ + ddshmanu_ + ddshtradservice_ + ddshotherservice_ +
      ddsinc_SPD,
    data = d,
    subset = trim.dist == 1
  )

## bootstrap ses for trimmed gam
set.seed(1)

gam.boot.coef <- NULL
n.boot <- sum(trim.dist == 1)
for (i.boot in 1:1000) {
  set.seed(i.boot)
  boot.samp <- sample(which(trim.dist == 1), n.boot, replace = TRUE)
  gam.boot  <-
    gam::gam(
      del_spd_z_vs  ~ Flooded + s(near_dist_allrivers) + ddpopdensity_ + ddshforeign_ +
        ddpopnetinp1000_ + ddshpop_o60_ + ddue_
      + ddshagric_ + ddshmanu_ + ddshtradservice_ + ddshotherservice_ +
        ddsinc_SPD,
      data = d[boot.samp, ]
    )
  gam.boot.coef[i.boot] <- gam.boot$coef[2]
}

## Implement using MDEI ----

# Set up data
# Indicator for affected regions
pp <- c(0:3, 5, 6, 8)
sname <-
  c(
    "Schleswig-Holstein",
    "Niedersachsen",
    "Brandenburg",
    "Mecklenburg-Vorpommern",
    "Sachsen",
    "Sachsen-Anhalt",
    "Th\x9fringen "
  )
d$affected <- 1 * (d$bula %in% sname)
y <- d$del_spd_z_vs
## All of these variables come from the vector in the original analysis, having 
# added in the affected indicator generated above
vars.control <-
  c(
    "ddpopdensity_",
    "ddshforeign_" ,
    "ddpopnetinp1000_" ,
    "ddshpop_o60_" ,
    "ddue_"  ,
    "ddshagric_" ,
    "ddshmanu_" ,
    "ddshtradservice_" ,
    "ddshotherservice_" ,
    "ddsinc_SPD",
    "Flooded",
    "affected"
  )
treat <- d$near_dist_allrivers
X <- as.matrix(d[, vars.control])


# Run MDEI
set.seed(1)
s1 <- MDEI(
  y = y,
  treat = treat,
  X = X,
  splits = 20
)#
fits.CIs <- s1$CIs.theta + s1$Ey.x

## Second analysis
vars.control2 <-
  c(
    "ddpopdensity_",
    "ddshforeign_" ,
    "ddpopnetinp1000_" ,
    "ddshpop_o60_" ,
    "ddue_"  ,
    "ddshagric_" ,
    "ddshmanu_" ,
    "ddshtradservice_" ,
    "ddshotherservice_" ,
    "ddsinc_SPD",
    "near_dist_allrivers",
    "affected"
  )
treat2 <- d$Flooded
X2 <- d[, vars.control2]
X.bh <- model.matrix(lm.out)[, -c(1:2)]

# Run grf
set.seed(1)
g1 <- causal_forest(
  X = X.bh,
  Y = y,
  W = treat2,
  num.trees = 20000
)
average_treatment_effect(g1, target.sample = "treated")
ate_g <- average_treatment_effect(g1, target.sample = "treated")
grf.output <- c(ate_g[1], c(-ate_g[2], ate_g[2]) * 1.96 + ate_g[1])
grf.latex <-
  c(round(grf.output[1], 2),
    paste("(", grf.output[2], ", ", grf.output[3], ")", sep = ""))
tic()

# Our analysis w binary treatment
set.seed(1)
s0 <- MDEI(
  y = y,
  treat = treat2,
  X = X.bh,
  splits = 20,
  alpha = .95
)
toc()

## Estimate average
#point
point.ci <- function(obj, sub1) {
  s0 <- obj
  treat2 <- sub1
  p0 <- mean(s0$tau.est[treat2 == 1])
  # CIs
  CI.width.0 <-
    ((colMeans(s0$CIs.tau[treat2 == 1, ] - s0$tau[treat2 == 1])) ^ 2 / sum(treat2 ==
                                                                             1))[1] ^ .5 / 2 ^ .5
  output <- list(c(
    "point" = p0,
    "CI" = c(p0 - CI.width.0, p0 + CI.width.0)
  ))
  return(output)
}

# CIs
CI.width.0 <-
  ((colMeans(s0$CIs.tau[treat2 == 1, ] - s0$tau[treat2 == 1])) ^ 2 / sum(treat2 ==
                                                                           1))[1] ^ .5



##Figure 6 in the paper----
adj.pdf <- 1.2
pdf("Figure6.pdf",
    h = 4 * adj.pdf,
    w = 8 * adj.pdf)

gg_color <- function(n) {
  hues = seq(15, 375, length = n + 1)
  hcl(h = hues, l = 65, c = 100)[1:n]
}

par(mfrow = c(1, 2))

par(
  mar = c(2.25, 2.25, .5, .7),
  # Dist' from plot to side of page
  mgp = c(2, 0.4, 0),
  # Dist' plot to label
  las = 1,
  # Rotate y-axis text
  tck = -.01,
  # Reduce tick length
  xaxs = "i",
  yaxs = "i",
  # Remove plot padding
  oma = c(.3, .3, .3, .3)
)

y.gam <-
  predict(gam.out,
          se = T,
          terms = "s(near_dist_allrivers, 4)",
          type = "terms")
ylim.use <- range(c(s1$fit, y.gam$fit, fits.CIs))

p1 <-
  plot(
    d$near_dist_allrivers,
    y.gam$fit,
    ylim = ylim.use,
    type = "n",
    xlab = "",
    ylab = ""
  )

mtext(
  side = 1,
  "Distance to Elbe or Flooded Tributary (km)",
  font = 2,
  line = 1.3
)
mtext(
  side = 2,
  "GAM Residual: Change in SPD PR Vote Share",
  font = 2,
  line = 1.8,
  las = 0
)


#rect(par("usr")[1],par("usr")[3],par("usr")[2],par("usr")[4],col = "gray90")
abline(h = (-15:10) * 5, col = "white")
abline(v = (1:1000) * 50, col = "white")
sort.ind <- sort(treat, ind = T)$ix
lines(treat[sort.ind], y.gam$fit[sort.ind])
lines(treat[sort.ind], (y.gam$fit + 2 * y.gam$se.fit)[sort.ind], lty = 2)
lines(treat[sort.ind], (y.gam$fit - 2 * y.gam$se.fit)[sort.ind], lty = 2)


of   <- predict(gam.out, type = "terms")
pres <- residuals(gam.out) + of[, 1]


sname <-
  c(
    "Schleswig-Holstein",
    "Niedersachsen",
    "Brandenburg",
    "Mecklenburg-Vorpommern",
    "Sachsen",
    "Sachsen-Anhalt",
    "Th\x9fringen"
  )
for (i in 1:length(sname)) {
  yyy <- pres[d$bula == sname[i]]
  xxx <- d$near_dist_allrivers[d$bula == sname[i]]
  mod1 <- lm(yyy ~ xxx)
  print(summary(mod1))
  rrange <- seq(min(xxx), max(xxx), by = 12)
  locfit <- predict(mod1, newdata = data.frame(xxx = rrange))
  points(
    rrange,
    locfit,
    col = gg_color(10)[4],
    pch = 1,
    type = "o",
    cex = .75
  )
}


points(
  y = pres,
  x = treat,
  pch = 20,
  col = sapply(
    d$Flood,
    FUN = function(z)
      ifelse(z, "black", "gray70")
  )
)
legend(
  "topright",
  legend = c(
    "GAM Main-Effect Function",
    "Pointwise CIs (2xSE)",
    "Flooded",
    "Other Districts",
    "Flooded Region"
  ),
  pch = c(NA, NA, 20, 20, 1),
  lty = c(1, 3, NA, NA, 1),
  col = c("black", "black", "black", "gray70", gg_color(10)[4]),
  cex = .75,
  bty = "n"
)





plot(
  treat,
  s1$Ey.x + s1$tau.est,
  main = "",
  pch = ifelse(d$Flooded, 19, 1),
  type = "n",
  xlab = "",
  ylab = "",
  xlim = range(c(treat)),
  ylim = ylim.use
)
#rect(par("usr")[1],par("usr")[3],par("usr")[2],par("usr")[4],col = "gray90")
abline(h = (-15:10) * 5, col = "white")
abline(v = (1:1000) * 50, col = "white")

#points(treat,pres,col="red")
sig1 <- apply(fits.CIs, 1, prod) > 0
segments(
  x0 = treat[d$Flood != 0],
  x1 = treat[d$Flood != 0],
  y0 = fits.CIs[d$Flood != 0, 1],
  y1 = fits.CIs[d$Flood != 0, 2],
  col = sapply(
    sig1[d$Flood != 0],
    FUN = function(z)
      ifelse(z, gg_color(2)[1], gg_color(2)[2])
  ),
  lwd = 1.5
)

sname <-
  c(
    "Schleswig-Holstein",
    "Niedersachsen",
    "Brandenburg",
    "Mecklenburg-Vorpommern",
    "Sachsen",
    "Sachsen-Anhalt",
    "Th\x9fringen"
  )
for (i in 1:length(sname)) {
  yyy <- (s1$Ey.x + s1$tau.est)[d$bula == sname[i]]
  xxx <- d$near_dist_allrivers[d$bula == sname[i]]
  mod1 <- lm(yyy ~ xxx)
  print(summary(mod1))
  rrange <- seq(min(xxx), max(xxx), by = 12)
  locfit <- predict(mod1, newdata = data.frame(xxx = rrange))
  points(
    rrange,
    locfit,
    col = gg_color(10)[4],
    pch = 1,
    type = "o",
    cex = .75
  )
}



points(
  y = s1$Ey.x + s1$tau.est,
  x = treat,
  pch = 20,
  col = sapply(
    d$Flood,
    FUN = function(z)
      ifelse(z, "black", "gray70")
  )
)

mtext(
  side = 1,
  "Distance to Elbe or Flooded Tributary (km)",
  font = 2,
  line = 1.3
)
mtext(
  side = 2,
  "Expected Change in SPD PR Vote Share",
  font = 2,
  line = 1.8,
  las = 0
)

legend.txt <-
  c(
    "Flooded",
    "Not Flooded",
    "Significant (95%)",
    "Not Significant (95%)",
    "Flooded Region"
  )
legend(
  "topright",
  legend.txt,
  bty = "n",
  col = c("black", "gray70", gg_color(2), gg_color(10)[4]),
  pch = c(19, 19, 124, 124, 1),
  lty = c(rep(NA, 4), 1)
)
#mtext(side=3,"Change in SPD PR Vote Share 2002-1998 and Distance to Elbe ",font=2,line=.8,cex=1.25)

dev.off()



##Figure 7 in the paper: Marginal effect----
pdf("Figure7.pdf",
    h = 4 * adj.pdf,
    w = 4 * adj.pdf)
par(
  mar = c(2.25, 2.25, .5, .7),
  # Dist' from plot to side of page
  mgp = c(2, 0.4, 0),
  # Dist' plot to label
  las = 1,
  # Rotate y-axis text
  tck = -.01,
  # Reduce tick length
  xaxs = "i",
  yaxs = "i",
  # Remove plot padding
  oma = c(.7, .7, .7, .7)
)
plot(
  treat,
  s1$tau.est,
  ylim = range(c(s1$CIs.tau, 4)),
  main = "",
  pch = ifelse(d$Flooded, 19, 1),
  type = "n",
  xlab = "",
  ylab = "",
  xlim = range(c(treat))
)
#rect(par("usr")[1],par("usr")[3],par("usr")[2],par("usr")[4],col = "gray90")
abline(h = (-15:10), col = "white")
abline(v = (1:1000) * 50, col = "white")

#points(treat,pres,col="red")
sig1 <- apply(s1$CIs.tau, 1, prod) > 0
segments(
  x0 = treat,
  x1 = treat,
  y0 = s1$CIs.tau[, 1],
  y1 = s1$CIs.tau[, 2],
  col = sapply(
    sig1,
    FUN = function(z)
      ifelse(z, gg_color(2)[1], gg_color(2)[2])
  ),
  lwd = 1.5
)
points(
  y = s1$tau.est,
  x = treat,
  pch = 20,
  col = sapply(
    d$Flood,
    FUN = function(z)
      ifelse(z, "black", "gray70")
  )
)
mtext(
  side = 1,
  "Distance to Elbe or Flooded Tributary (km)",
  font = 2,
  line = 1.9
)
mtext(
  side = 2,
  "Estimated Effect of Distance on Vote Share",
  font = 2,
  line = 1.9,
  las = 0
)

legend.txt <-
  c("Flooded",
    "Not Flooded",
    "Significant (90%)",
    "Not Significant (90%)")
legend(
  "topleft",
  legend.txt,
  bty = "n",
  col = c("black", "gray70", gg_color(2)),
  pch = c(19, 19, 124, 124)
)
#mtext(side=3,"Change in SPD PR Vote Share 2002-1998 and Distance to Elbe ",font=2,line=.8,cex=1.25)

dev.off()

## Table of results ----

make.ci <- function(obj) {
  sum1 <- summary(obj)
  point1 <- sum1$coef[2, 1]
  ses <- sum1$coef[2, 2]
  out1 <- c(point1, point1 - 1.96 * ses, point1 + 1.96 * ses)
  return(out1)
}

## Table of results ----
out.tab <-
  rbind(
    make.ci(lm.out),
    unlist(point.ci(s0, treat2)),
    make.ci(lm.trim),
    grf.output,
    c(gam.trim$coef[2], quantile(gam.boot.coef, c(0.025, 0.975)))
  )

rownames(out.tab) <- c("Original Regression",
                       "Trimmed Regression",
                       "MDEI",
                       "GRF",
                       "GAM")

xtable(out.tab)
# Printing table for convenience
# NOTE the bootstrapped ses on the gam do not match
# what is in the manuscript, so we will change what is in the manuscript
# (this change affects none of our argument).
pdf("table1.pdf", width = 4, height = 5)
grid.table(round(out.tab, 2))
dev.off()